# Explore topic 13a # First load all of the functions we will use source( "../gnrnd5.R") source( "../gnrnd4.R") source( "../pop_sd.R") source("../assess_normality.R") source( "../ci_known.R") # Topic 13a looks at generating a confidence # interval for a population mean when we # know the population standard deviation. # # Let us generate a population gnrnd5(145209499902, 3306001054) # put the population into big_pop big_pop <- L1 # We need to know the population standard deviation hold_pop_sd <- pop_sd( big_pop ) # let us say that we are taking a sample # of a certain size samp_size <- 38 # then, just so that we can all get the same # sample, generate the index values for # a sample of that size key1 <- 702370001+ (samp_size-1)*100 gnrnd4(key1, 500000001) L1 this_sample <- big_pop[ L1 ] # look at our sample this_sample # we can find the sample mean and the sample standard # deviation samp_mean <- mean( this_sample ) samp_sd <- sd( this_sample ) samp_mean samp_sd # Now, we will use the population # standard deviation, the # distribution of the sample means will be # Normal with the same mean as the population # and standard deviation equal to the population # standard deviation divided by the square root # of the sample size. # then we make our confidence interval for # some specified confidence level conf_level <- 0.95 # that means that we are missing 1-conf_level # which we will split in half, one half in # each tail z_over_2 <- (1-conf_level)/2 z_over_2 # we can find # samp_mean +/- z(alpha_over2)*samp_sd/sqrt(samp_size) # and that will be our confidence interval z_score_low <- qnorm( z_over_2 ) z_score_low z_score_high <- qnorm( z_over_2, lower.tail=FALSE) # opposite of low val z_score_high st_error <- hold_pop_sd / sqrt( samp_size ) st_error # CI low value samp_mean + z_score_low*st_error # CI high value samp_mean + z_score_high*st_error ##### or we could have found the margin of error MOE <- z_score_high*st_error MOE # and then found the limits for the # confidence interval samp_mean - MOE # the low end samp_mean + MOE # the high end ### of course all of this could be done via # our ci_known function ci_known( hold_pop_sd, samp_size, samp_mean, conf_level) ################################## # we could try this at a different confidence # level. Just alter the value in line 49 and # then run the subsequent lines, or just skip # down to line 83 and get the new values #################################### # If we express the confidence level as a # percent then we say that that percent of the # confidence intervals that we generate # using this methodology will contain the # true mean. That means, that at this point # in running the script, I do not know if the # 95% confidence interval that we generated, # namely (173.464, 213.299 ) does or does not # contain the true mean. # # Let us find the true mean and see if it is # in the interval. true_mean <- mean( big_pop ) true_mean # yes it is! # This has been an illustration, but let us # go through the process 10000 times and # see how many intervals that we generate this # way contain the true mean # first reset the confidence level and # sample size just in case we want to change # them later conf_level <- 0.95 samp_size <- 38 L3 <- 1:10000 for( i in 1:10000 ) { this_sample <- sample( big_pop, samp_size ) this_ci <- ci_known( hold_pop_sd, samp_size, mean( this_sample), conf_level) if( this_ci[1] <= true_mean & true_mean <= this_ci[2] ) { L3[i] = "hit"} else { L3[i] = "missed"} } # see how we did table( L3 ) ######### # if we want we can do this again and we # can even change the values in lines 118 # and/or 119 if we want.